*Data prep*

clear
use "Data\Core_datasets\wealth_from_SAS"
destring aar, replace
keep if aar>=1980 & aar<=2018
compress, nocoalesce


bysort pnr aar: gen n=_n
bysort pnr aar: egen wealth_new=mean(wealth)
keep if n==1
drop wealth n
rename wealth_new wealth
sort pnr aar

*deflate
gen log_wealth=log(wealth)
reg log_wealth ib2000.aar

gen year_effects=.
replace year_effects=0 if aar==2000

replace year_effects=_b[1980.aar] if aar==1980
replace year_effects=_b[1981.aar] if aar==1981
replace year_effects=_b[1982.aar] if aar==1982
replace year_effects=_b[1983.aar] if aar==1983
replace year_effects=_b[1984.aar] if aar==1984
replace year_effects=_b[1985.aar] if aar==1985
replace year_effects=_b[1986.aar] if aar==1986
replace year_effects=_b[1987.aar] if aar==1987
replace year_effects=_b[1988.aar] if aar==1988
replace year_effects=_b[1989.aar] if aar==1989
replace year_effects=_b[1990.aar] if aar==1990
replace year_effects=_b[1991.aar] if aar==1991
replace year_effects=_b[1992.aar] if aar==1992
replace year_effects=_b[1993.aar] if aar==1993
replace year_effects=_b[1994.aar] if aar==1994
replace year_effects=_b[1995.aar] if aar==1995
replace year_effects=_b[1996.aar] if aar==1996
replace year_effects=_b[1997.aar] if aar==1997
replace year_effects=_b[1998.aar] if aar==1998
replace year_effects=_b[1999.aar] if aar==1999

replace year_effects=_b[2001.aar] if aar==2001
replace year_effects=_b[2002.aar] if aar==2002
replace year_effects=_b[2003.aar] if aar==2003
replace year_effects=_b[2004.aar] if aar==2004
replace year_effects=_b[2005.aar] if aar==2005
replace year_effects=_b[2006.aar] if aar==2006
replace year_effects=_b[2007.aar] if aar==2007
replace year_effects=_b[2008.aar] if aar==2008
replace year_effects=_b[2009.aar] if aar==2009
replace year_effects=_b[2010.aar] if aar==2010
replace year_effects=_b[2011.aar] if aar==2011
replace year_effects=_b[2012.aar] if aar==2012
replace year_effects=_b[2013.aar] if aar==2013
replace year_effects=_b[2014.aar] if aar==2014
replace year_effects=_b[2015.aar] if aar==2015
replace year_effects=_b[2016.aar] if aar==2016
replace year_effects=_b[2017.aar] if aar==2017
replace year_effects=_b[2018.aar] if aar==2018

gen wealth_deflated=wealth*(1-year_effects)
drop log_wealth year_effects

*make parental variables
gen obs_far=1 if !missing(far_nr)
gen obs_mor=1 if !missing(mor_nr)

destring alder, replace

gen far_id_temp=""
replace far_id_temp=far_nr if !missing(obs_far) & alder<=35
replace far_id_temp=pnr if alder>35

bysort far_id_temp aar: gen n=_n
bysort far_id_temp aar: egen n_tot=max(n)
bysort far_id_temp aar: gen temp=wealth_deflated if pnr==far_id_temp & far_id_temp!="" & n_tot>1 & !missing(n_tot)
bysort far_id_temp aar: egen temp2=max(temp)
gen dad_wealth_deflated=temp2 if pnr!=far_id_temp
drop far_id_temp n n_tot temp temp2

gen mor_id_temp=""
replace mor_id_temp=mor_nr if !missing(obs_mor) & alder<=35
replace mor_id_temp=pnr if alder>35

bysort mor_id_temp aar: gen n=_n
bysort mor_id_temp aar: egen n_tot=max(n)
bysort mor_id_temp aar: gen temp=wealth_deflated if pnr==mor_id_temp & mor_id_temp!="" & n_tot>1 & !missing(n_tot)
bysort mor_id_temp aar: egen temp2=max(temp)
gen mom_wealth_deflated=temp2 if pnr!=mor_id_temp
drop mor_id_temp n n_tot temp temp2

*Save data
sort pnr aar
keep pnr aar wealth wealth_deflated dad_wealth_deflated mom_wealth_deflated
save "Data\Core_datasets\wealth_from_SAS.dta", replace



**Get parental wealth**

clear
use "Data\sample1_new.dta"

drop year1 year2 year3 year4 year5 year9 year10 year11 start_wage_ambition ten_wage_ambition old_ambition individual_wage_growth_ambition extreme_ambition wage10_ambition_temp n_wage10_ambition_group wage_start_ambition_temp n_wage_start_ambition_group wage10_ambition wage_start_ambition n_wage10_individual_ambition n_wage_start_individual_ambition wage10_mean_ambition wage_start_mean_ambition wage_growth_ambition

*Merge on wealth variable*
sort pnr aar

merge 1:1 pnr aar using "Data\Core_datasets\wealth_from_SAS"

drop if _merge==2
drop _merge


*Get parental wealth at final_educ_year

gen temp=dad_wealth_deflated+mom_wealth_deflated if aar==final_educ_year & !missing(dad_wealth_deflated) & !missing(mom_wealth_deflated)
bysort pnr: egen parental_wealth=max(temp)
drop temp

*Group by final_educ*
sort final_educ

*get group variables

by final_educ: egen temp=mean(parental_wealth) if final_educ!=. & final_educ!=1 & individual==1  
by final_educ: egen p_wealth=max(temp)
drop temp



*Fix problem with some large "out-dated" educational programs

forvalues i=81(1)85{
	
*9th grade
gen temp=p_wealth if final_educ==1109`i'
egen temp_2=max(temp)
replace p_wealth=temp_2 if (final_educ==1007 | final_educ==1008 | final_educ==1023 | final_educ==1123 | final_educ==1009 | final_educ==1022) & grad_region==`i' 
drop temp temp_2

replace final_educ=1107`i' if final_educ==1107 & grad_region==`i'
replace final_educ=1008`i' if final_educ==1008 & grad_region==`i'
replace final_educ=1023`i' if final_educ==1023 & grad_region==`i'
replace final_educ=1123`i' if final_educ==1123 & grad_region==`i'
replace final_educ=1009`i' if final_educ==1009 & grad_region==`i'
replace final_educ=1022`i' if final_educ==1022 & grad_region==`i'

*10th grade
gen temp=p_wealth if final_educ==1110`i'
egen temp_2=max(temp)
replace p_wealth=temp_2 if final_educ==1010 & grad_region==`i' 
drop temp temp_2

replace final_educ=1010`i' if final_educ==1010 & grad_region==`i'

}

*3.g 
gen temp=p_wealth if final_educ==1198
egen temp_2=max(temp)
replace p_wealth=temp_2 if final_educ==1097
drop temp temp_2


**kmeans**
*standardize variables
sum p_wealth
sca the_mean_s=r(mean)
sca the_sd_s=r(sd)
gen p_wealth_s=(p_wealth-the_mean_s)/the_sd_s
sum p_wealth_s


cluster kmeans p_wealth_s, k(4) name(wealth_ambition) s(kr(1234))
tab wealth_ambition

tabstat p_wealth_s, by(wealth_ambition)

*SAVE*
save "Data\wealth_ambition.dta", replace

**Get final version**

*use "Data\wealth_ambition.dta"


*Define marital status
gen relationship=0
replace relationship=1 if civst=="G" | (civst!="G" & faelle_nr!="")

gen married=0
replace married=1 if civst=="G"

gen cohab=0
replace cohab=1 if relationship==1 & married==0

*Make couple ID based on the PNR of the man
gen couple_id="."
replace couple_id=pnr if koen=="1" & relationship==1
replace couple_id=aegte_nr if koen=="2" & married==1
replace couple_id=faelle_nr if koen=="2" & cohab==1
sort couple_id aar

*Keep only couples where we observe both partners
gen temp2=koen if relationship==1
destring temp2, replace
by couple_id aar: egen temp3=mean(temp2)
keep if (temp3>1 & temp3<2 & relationship==1) | relationship==0
drop temp2 temp3

//generate partners' age

by couple_id aar, sort: gen age_male = age if koen=="1"
by couple_id aar: gen age_female = age if koen=="2"
by couple_id aar: egen maxage = max(age_male)
by couple_id aar: replace age_male = maxage
drop maxage
by couple_id aar: egen maxage = max(age_female)
by couple_id aar: replace age_female = maxage
drop maxage

*Age restriction
keep if ((age_male>=19 & age_male<=60) & (age_female>=19 & age_female<=60) & relationship==1) | (age>=19 & age<=60 & relationship==0)
*zero deleted

*Only couples
keep if relationship==1


drop individual 

sort pnr aar

save "Data\wealth_ambition_final.dta", replace
